-- This script processes failure cause for different categories of events grouped by duration (short, medium, long)

-- author: cristina

REGISTER failurecausefunc.jar;

raw = LOAD '$inputDir/event_trace.tab' USING PigStorage('\t') AS (event_id,component_id,node_id,platform_id,node_name,event_type,event_start_time,event_stop_time,event_end_reason:chararray);

-- build durations
duration = FOREACH raw GENERATE event_stop_time-event_start_time as dur, event_end_reason as fault;
mapping = FOREACH duration GENERATE flatten(RealLabels(dur, fault)) as key, 1 as value;
--filteredMapping = FILTER mapping BY key!=NULL;


-- group and average
grup = GROUP mapping BY key;
rez = FOREACH grup GENERATE group, SUM(mapping.value) as sum;
filteredRez = FILTER rez BY sum > 1000;

STORE filteredRez INTO 'failurecause.rez' USING PigStorage();
